In [7]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
In [29]:
df = pd.read_csv('airquality.csv')
In [30]:
df.set_index(['Day', 'Month'],inplace=True)
df.head()
Out[30]:
Ozone Solar.R Wind Temp
Day Month
1 5 41.0 190.0 7.4 67
2 5 36.0 118.0 8.0 72
3 5 12.0 149.0 12.6 74
4 5 18.0 313.0 11.5 62
5 5 NaN NaN 14.3 56
In [11]:
df.corr()
Out[11]:
Ozone Solar.R Wind Temp
Ozone 1.000000 0.348342 -0.601547 0.698360
Solar.R 0.348342 1.000000 -0.056792 0.275840
Wind -0.601547 -0.056792 1.000000 -0.457988
Temp 0.698360 0.275840 -0.457988 1.000000
In [32]:
pd.plotting.scatter_matrix(df,figsize=(10, 10))
plt.show()
In [33]:
columns=['education', 'age', 'capital-gain', 'race', 'capital-loss',
         'hours-per-week', 'sex', 'classification']
census =  pd.read_csv('census.csv',names=columns, header=None)
census['capital-gain'] = pd.to_numeric(census['capital-gain'],errors='coerce')
census.sex = census.sex.astype('category').cat.codes
census.classification = census.classification.astype('category').cat.codes
census.head()
Out[33]:
education age capital-gain race capital-loss hours-per-week sex classification
0 Bachelors 39 2174.0 White 0 40 1 0
1 Bachelors 50 NaN White 0 13 1 0
2 HS-grad 38 NaN White 0 40 1 0
3 11th 53 NaN Black 0 40 1 0
4 Bachelors 28 0.0 Black 0 40 0 0
In [34]:
y=census.race.value_counts().index
x=census.race.value_counts().values
plt.bar(y,x,color=['r','g','b','y','k'])
plt.xticks(rotation = 90)
plt.title("Race Analysis")
for index,data in enumerate(x):
    plt.text(x=index , y =data+200 , s=f"{data}" )
plt.show()
In [35]:
y=census.education.value_counts().index
x=census.education.value_counts().values
plt.bar(y,x,color='green')
plt.axhline(5000, color='red', ls='dotted')
plt.xticks(rotation = 90)
plt.title("Education")
for index,data in enumerate(x):
    plt.text(x=index , y =data+100 , s=f"{data}" )
plt.show()
In [37]:
plt.figure(figsize=(6,6))
x,y = plt.pie(census.race.value_counts().values,labels=census.race.value_counts().index)
x[1].set_visible(False)
plt.show()
In [44]:
plt.figure(figsize=(6,6))
plt.pie(census.education.value_counts().values[:-3],
              labels=census.education.value_counts()[:-3].index,
               autopct="%0.2f",pctdistance=0.80
                )
hole = plt.Circle((0, 0), 0.6, facecolor='white')
plt.gcf().gca().add_artist(hole)
Out[44]:
<matplotlib.patches.Circle at 0x87996a460>
In [47]:
np.concatenate((census[census.sex==1].classification.value_counts().values, 
                            census[census.sex==0].classification.value_counts().values))
Out[47]:
array([14000,  5778,  8744,  1014], dtype=int64)
In [48]:
plt.figure(figsize=(8,8))
#outside donut
ins=plt.pie(census.sex.value_counts().values,
              labels=["Male","Female"],
       autopct="%0.2f",
           pctdistance=0.80,
           startangle=90)
#inside donut
out=plt.pie(np.concatenate((census[census.sex==1].classification.value_counts().values, 
                            census[census.sex==0].classification.value_counts().values)),
              labels=["<50k",">50k","<50k",">50k"],
       autopct="%0.2f",radius=0.7,pctdistance=0.80,startangle=90)
hole = plt.Circle((0, 0), 0.4, color='white', linewidth=0)
fig = plt.gcf()
fig.gca().add_artist(hole)
plt.show()
#plt.gcf().gca().add_artist(hole)
In [49]:
calls=pd.read_csv('crimes_dataset.csv')
calls.sample(5)
Out[49]:
ID CASENO OFFENSE CVLEGEND CVDOW Block_Location BLKADDR City State Day Lat Lon timestamp
3239 3239 17031089 BURGLARY AUTO BURGLARY - VEHICLE 3 100 SPINNAKER WAY\nBerkeley, CA\n(37.870287, -... 100 SPINNAKER WAY Berkeley CA Wednesday 37.870287 -122.316238 31/05/2017 12:45
1690 1690 17035142 THEFT MISD. (UNDER $950) LARCENY 5 2100 SHATTUCK AVE\nBerkeley, CA\n(37.871167, -... 2100 SHATTUCK AVE Berkeley CA Friday 37.871167 -122.268285 16/06/2017 20:30
3730 3730 17090752 BURGLARY AUTO BURGLARY - VEHICLE 6 2400 VIRGINIA ST\nBerkeley, CA\n(37.877548, -1... 2400 VIRGINIA ST Berkeley CA Saturday 37.877548 -122.262710 08/04/2017 16:15
4386 4386 17043052 DISTURBANCE DISORDERLY CONDUCT 1 2700 HILLEGASS AVE\nBerkeley, CA\n(37.861672, ... 2700 HILLEGASS AVE Berkeley CA Monday 37.861672 -122.255992 24/07/2017 13:10
2377 2377 17091125 THEFT MISD. (UNDER $950) LARCENY 1 3100 ELLIS ST\nBerkeley, CA\n(37.852066, -122.... 3100 ELLIS ST Berkeley CA Monday 37.852066 -122.272994 12/06/2017 18:00
In [53]:
calls.shape
Out[53]:
(5508, 13)
In [52]:
import folium
import folium.plugins # The Folium Javascript Map Library

SF_COORDINATES = (37.87, -122.28)
sf_map = folium.Map(location=SF_COORDINATES, zoom_start=13)
locs = calls[['Lat', 'Lon']].astype('float').dropna().to_numpy()
heatmap = folium.plugins.HeatMap(locs.tolist(), radius = 10)
sf_map.add_child(heatmap)
Out[52]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [53]:
cluster = folium.plugins.MarkerCluster()
for _, r in calls[['Lat', 'Lon', 'CVLEGEND']].tail(5000).dropna().iterrows():
    cluster.add_child(
        folium.Marker([float(r["Lat"]), float(r["Lon"])], popup=r['CVLEGEND']))
    
sf_map = folium.Map(location=SF_COORDINATES, zoom_start=13)
sf_map.add_child(cluster)
sf_map
Out[53]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]: